In [1]:
# File system routine for colab
import os, sys

my_path = "EPFLectures2/DataViz/happiviz/" # need to add folder to your drive and change this when you execute
IN_COLAB = 'google.colab' in sys.modules

class FileSystem:
  def __init__(self, colab_dir=my_path, local_dir="./", data_dir="data"):
    if (IN_COLAB):
      from google.colab import drive
      drive.mount('/gdrive')
      self.root_dir = os.path.join("/gdrive/My Drive/", colab_dir)
    else:
      self.root_dir = local_dir
    self.data_dir = data_dir
    self.change_directory = False

  def data_path(self, name):
    return os.path.join(self.data_dir, name) if self.change_directory else os.path.join(self.root_dir, self.data_dir, name)

  def path(self, name):
    return os.path.join("./", name) if self.change_directory else os.path.join("./", self.root_dir, name)

  def cd(self):
    %cd {self.root_dir}
    %ls
    self.change_directory = True

fs = FileSystem()
fs.cd()
/Users/nodiz/Google Drive/EPFLectures2/DataViz/happiviz/repo
2015.csv                   Icon?
2016.csv                   Preprocessing (1).ipynb
2017.csv                   README.md
2018.csv                   first-preprocessing.ipynb
2019.csv
In [2]:
!pip install plotly
!pip install chart-studio
Requirement already satisfied: plotly in /Users/nodiz/anaconda3/lib/python3.7/site-packages (4.6.0)
Requirement already satisfied: retrying>=1.3.3 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from plotly) (1.3.3)
Requirement already satisfied: six in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from plotly) (1.12.0)
Requirement already satisfied: chart-studio in /Users/nodiz/anaconda3/lib/python3.7/site-packages (1.1.0)
Requirement already satisfied: plotly in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from chart-studio) (4.6.0)
Requirement already satisfied: six in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from chart-studio) (1.12.0)
Requirement already satisfied: requests in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from chart-studio) (2.22.0)
Requirement already satisfied: retrying>=1.3.3 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from chart-studio) (1.3.3)
Requirement already satisfied: idna<2.9,>=2.5 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from requests->chart-studio) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from requests->chart-studio) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from requests->chart-studio) (1.24.2)
Requirement already satisfied: certifi>=2017.4.17 in /Users/nodiz/anaconda3/lib/python3.7/site-packages (from requests->chart-studio) (2019.9.11)
In [3]:
# source https://www.kaggle.com/dhanyajothimani/basic-visualization-and-clustering-in-python modified to work on google colab


#Call required libraries
import time                   # To time processes
import warnings               # To suppress warnings

import numpy as np            # Data manipulation
import pandas as pd           # Dataframe manipulatio 
import matplotlib.pyplot as plt                   # For graphics
import seaborn as sns
import chart_studio.plotly as py #For World Map
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

from sklearn.preprocessing import StandardScaler  # For scaling dataset
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation #For clustering
from sklearn.mixture import GaussianMixture #For GMM clustering

import os                     # For os related operations
import sys                    # For data size

def enable_plotly_in_cell():
  import IPython
  from plotly.offline import init_notebook_mode
  display(IPython.core.display.HTML('''<script src="/static/components/requirejs/require.js"></script>'''))
  init_notebook_mode(connected=False)
In [5]:
wh = pd.read_csv("https://raw.githubusercontent.com/com-480-data-visualization/com-480-project-datavaders/master/2017.csv")

wh.describe()
Out[5]:
Happiness.Rank Happiness.Score Whisker.high Whisker.low Economy..GDP.per.Capita. Family Health..Life.Expectancy. Freedom Generosity Trust..Government.Corruption. Dystopia.Residual
count 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000
mean 78.000000 5.354019 5.452326 5.255713 0.984718 1.188898 0.551341 0.408786 0.246883 0.123120 1.850238
std 44.888751 1.131230 1.118542 1.145030 0.420793 0.287263 0.237073 0.149997 0.134780 0.101661 0.500028
min 1.000000 2.693000 2.864884 2.521116 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.377914
25% 39.500000 4.505500 4.608172 4.374955 0.663371 1.042635 0.369866 0.303677 0.154106 0.057271 1.591291
50% 78.000000 5.279000 5.370032 5.193152 1.064578 1.253918 0.606042 0.437454 0.231538 0.089848 1.832910
75% 116.500000 6.101500 6.194600 6.006527 1.318027 1.414316 0.723008 0.516561 0.323762 0.153296 2.144654
max 155.000000 7.537000 7.622030 7.479556 1.870766 1.610574 0.949492 0.658249 0.838075 0.464308 3.117485
In [6]:
print("Dimension of dataset: wh.shape")
wh.dtypes
Dimension of dataset: wh.shape
Out[6]:
Country                           object
Happiness.Rank                     int64
Happiness.Score                  float64
Whisker.high                     float64
Whisker.low                      float64
Economy..GDP.per.Capita.         float64
Family                           float64
Health..Life.Expectancy.         float64
Freedom                          float64
Generosity                       float64
Trust..Government.Corruption.    float64
Dystopia.Residual                float64
dtype: object
In [7]:
wh1 = wh[['Happiness.Score','Economy..GDP.per.Capita.','Family','Health..Life.Expectancy.', 'Freedom', 
          'Generosity','Trust..Government.Corruption.','Dystopia.Residual']] #Subsetting the data
cor = wh1.corr() #Calculate the correlation of the above variables
sns.heatmap(cor, square = True) #Plot the correlation as heat map
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x12147b650>
In [8]:
#Ref: https://plot.ly/python/choropleth-maps/

enable_plotly_in_cell()

data = dict(type = 'choropleth', 
           locations = wh['Country'],
           locationmode = 'country names',
           z = wh['Happiness.Score'], 
           text = wh['Country'],
           colorbar = {'title':'Happiness'})
layout = dict(title = 'Happiness Index 2017', 
             geo = dict(showframe = False, 
                       projection = {'type': 'mercator'}))
choromap3 = go.Figure(data = [data], layout=layout)
iplot(choromap3)
In [9]:
data = dict(type = 'choropleth', 
           locations = wh['Country'],
           locationmode = 'country names',
           z = wh['Happiness.Score'], 
           text = wh['Country'],
           colorbar = {'title':'Happiness'})
layout = dict(title = 'Happiness Index 2017', 
             geo = dict(showframe = False, 
                       projection = {'type': 'mercator'}))
choromap3 = go.Figure(data = [data], layout=layout)
iplot(choromap3)
In [17]:
import plotly.express as px

fig = px.scatter(wh, x="Happiness.Rank", y="Happiness.Score",
           animation_group="Country",
           size="Happiness.Score", color="Country", hover_name="Country")
fig.show()
In [18]:
enable_plotly_in_cell()
import plotly.express as px

fig = px.scatter(wh, x="Happiness.Rank", y="Happiness.Score",
           animation_group="Country",
           size="Happiness.Score", color="Country", hover_name="Country")
fig.show()
In [26]:
"""d2019 = pd.read_csv("https://raw.githubusercontent.com/com-480-data-visualization/com-480-project-datavaders/master/2019.csv")
coltoselect = ['rank', 'country', 'region', 'score', 'gdp_per_capita','healthy_life_expectancy', 'freedom_to_life_choice', 'generosity','corruption_perceptions']

wh = wh.loc[:,coltoselect].copy()
d2019 = d2019.loc[:,coltoselect].copy()

wh["year"] = 2018
d2019["year"] = 2019
finaldf = d2019.append([wh])

fig = px.scatter(finaldf, x="Happiness.Rank", y="Happiness.Score",  animation_frame="year",
           animation_group="Country",
           size="Happiness.Score", color="Country", hover_name="Country")
fig.show()"""
Out[26]:
'd2019 = pd.read_csv("https://raw.githubusercontent.com/com-480-data-visualization/com-480-project-datavaders/master/2019.csv")\ncoltoselect = [\'rank\', \'country\', \'region\', \'score\', \'gdp_per_capita\',\'healthy_life_expectancy\', \'freedom_to_life_choice\', \'generosity\',\'corruption_perceptions\']\n\nwh = wh.loc[:,coltoselect].copy()\nd2019 = d2019.loc[:,coltoselect].copy()\n\nwh["year"] = 2018\nd2019["year"] = 2019\nfinaldf = d2019.append([wh])\n\nfig = px.scatter(finaldf, x="Happiness.Rank", y="Happiness.Score",  animation_frame="year",\n           animation_group="Country",\n           size="Happiness.Score", color="Country", hover_name="Country")\nfig.show()'
In [ ]: